# Loading required R packages
library(ggplot2)
library(plotly)
library(shiny)
library(gridExtra)
library(xlsx)
library(MASS)
library(sf)
library(akima)
library(scales)
library(seriation)
library(dplyr)
library(crosstalk)
library(GGally)
library(tm)
library(wordcloud)
library(RColorBrewer)
library(htmltools)
library(tourr)
library(reshape)
library(ggraph)
library(igraph)
library(visNetwork)
library(data.table)
library(reshape2)
library(tibble)
get_outliers <- function(x){
quantile_values = quantile(x, probs = c(0.25, 0.75))
q1 = quantile_values["25%"]
q3 = quantile_values["75%"]
return(c(which((x > (q3+1.5*(q3-q1)))), which(x < (q1-1.5*(q3-q1)))))
}
baseball_scaled <- scale(baseball_data[,3:length(baseball_data)])
distance_matrix <- dist(baseball_scaled, method = "euclidean")
mds_result <- isoMDS(distance_matrix, k=2, p=2)
## initial value 19.856833
## iter 5 value 16.319153
## iter 10 value 16.046215
## final value 15.935476
## converged
coords <- mds_result$points
coords_mds <- as.data.frame(coords)
baseball_data_with_mds <- baseball_data
baseball_data_with_mds$MDS_V1 <- coords_mds$V1
baseball_data_with_mds$MDS_V2 <- coords_mds$V2
mtcars.pca <- prcomp(mtcars[,c(1:7,10,11)], center = TRUE,scale. = TRUE)
biplot(mtcars.pca, scale = 0)
#projection = list(type = "mercator"))
#projection = list(type = "albers usa"))
#projection = list(type = "equirectangular"))
#projection = list(type = "conic equal area"))
#projection = list(type = "azimuthal equal area"))
#projection = list(type = "equirectangular"))
#projection = list(type = "orthographic"))
#animation_opts(500, easing = 'linear', redraw = F)
# animation_opts(500, easing = 'quad', redraw = F)
# animation_opts(500, easing = 'cubic', redraw = F)
# animation_opts(500, easing = 'sin', redraw = F)
# animation_opts(500, easing = 'exp', redraw = F)
# animation_opts(500, easing = 'circle', redraw = F)
# animation_opts(500, easing = 'elastic', redraw = F)
# animation_opts(500, easing = 'back', redraw = F)
# animation_opts(500, easing = 'bounce', redraw = F)
# animation_opts(500, easing = 'linear-in', redraw = F)
# animation_opts(500, easing = 'quad-in', redraw = F)
# animation_opts(500, easing = 'cubic-in', redraw = F)
# animation_opts(500, easing = 'sin-in', redraw = F)
# animation_opts(500, easing = 'exp-in', redraw = F)
# animation_opts(500, easing = 'circle-in', redraw = F)
# animation_opts(500, easing = 'elastic-in', redraw = F)
# animation_opts(500, easing = 'back-in', redraw = F)
# animation_opts(500, easing = 'bounce-in', redraw = F)
# animation_opts(500, easing = 'linear-out', redraw = F)
# animation_opts(500, easing = 'quad-out', redraw = F)
# animation_opts(500, easing = 'cubic-out', redraw = F)
# animation_opts(500, easing = 'sin-out', redraw = F)
# animation_opts(500, easing = 'exp-out', redraw = F)
# animation_opts(500, easing = 'circle-out', redraw = F)
# animation_opts(500, easing = 'elastic-out', redraw = F)
# animation_opts(500, easing = 'back-out', redraw = F)
# animation_opts(500, easing = 'bounce-out', redraw = F)
# animation_opts(500, easing = 'linear-in-out', redraw = F)
# animation_opts(500, easing = 'quad-in-out', redraw = F)
# animation_opts(500, easing = 'cubic-in-out', redraw = F)
# animation_opts(500, easing = 'sin-in-out', redraw = F)
# animation_opts(500, easing = 'exp-in-out', redraw = F)
# animation_opts(500, easing = 'circle-in-out', redraw = F)
# animation_opts(500, easing = 'elastic-in-out', redraw = F)
# animation_opts(500, easing = 'back-in-out', redraw = F)
# animation_opts(500, easing = 'bounce-in-out', redraw = F)
mtcars <- as.data.table(mtcars)
mtcars <- mtcars[order(-mpg, cyl)]
head(mtcars, 10)
## mpg cyl disp hp drat wt qsec vs am gear carb
## 1: 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## 2: 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## 3: 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## 4: 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## 5: 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## 6: 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## 7: 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## 8: 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## 9: 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## 10: 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
# The palette with grey:
default_colors <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442",
"#0072B2", "#D55E00", "#CC79A7")
# ggplot2
ggplot(data=iris, aes(x= Sepal.Length, y = Petal.Length, color = Species)) + geom_point() + scale_fill_manual(values=default_colors) + ggtitle("Custom Color for ggplot2")
# plotly
plot_ly(data = iris, x = ~Sepal.Length, y = ~Petal.Length, color = ~Species, colors = default_colors) %>% layout(title = "Custom color for plotly")
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode
# ggplot2
ggplot(olive_data) +
geom_point(aes(x = oleic, y = palmitic, color=cut_interval(olive_data$linolenic, n = 4))) +
ggtitle("Dependence of Palmitic vs Oleic vs Linolenic") +
scale_colour_manual(values=c("brown", "azure", "green", "red"))
# plotly
plot_ly(iris, x = ~Petal.Length, y = ~Petal.Width,
type="scatter", mode = "markers" , color = ~Species,
colors = c("grey50", "blue", "red"), marker=list( size=20 , opacity=0.5))
density_plot_infection_risk = ggplot(senic_data) +
ggtitle("Density plot of Infection_Risk") +
geom_density(aes(x=Infection_Risk), fill = "lightblue") +
geom_point(data=senic_data[get_outliers(senic_data$Infection_Risk),],
aes(x=Infection_Risk, y=0, colour="Outliers"),
shape=23, size=2, fill="red") +
scale_color_manual(values = c("darkblue","black")) +
labs(colour="Legend") +
theme(plot.title = element_text(hjust = 0.5), legend.position = "right")
density_plot_infection_risk
ggplotly(p=density_plot_infection_risk)
outliers = senic_data[get_outliers(senic_data$Infection_Risk),c("Infection_Risk")]
senic_data$zero = 0
plot_ly(senic_data, x=~Infection_Risk) %>%
add_histogram(name="Histogram count") %>%
filter(is.element(Infection_Risk, outliers)) %>%
add_markers(x=~Infection_Risk,y=~zero, name="Outliers",
marker=list(symbol="diamond", size=10, line = list(color="black", width=1))) %>%
layout(title="Histogram of Infection_Risk", yaxis=list(title="Count"))
ggplot(senic_data) + geom_point(aes(x=Number_of_Nurses, y=Infection_Risk, color=Number_of_Beds)) +
ggtitle("Scatterplot of Infection_Risk vs Number_of_Nurses") +
theme(plot.title = element_text(hjust = 0.5))
ggplot(olive_data) +
geom_point(aes(x = oleic, y = palmitic,
color=cut_interval(olive_data$linolenic, n = 4))) +
ggtitle("Dependence of Palmitic vs Oleic vs Linolenic") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(color = 'Linolenic range')
ggplot(olive_data) + geom_point(aes(x = oleic, y = palmitic, size = cut_interval(linolenic, n = 4))) +
ggtitle("Dependence of Palmitic vs Oleic vs Linolenic") +
theme(plot.title = element_text(hjust = 0.5)) +
scale_size_manual(name = "Linolenic range", values = c(1, 2, 3, 4))
# Pre-processing - Setting angle values based on category
olive_data$linolenic_class <- cut_interval(olive_data$linolenic, n = 4)
levels(olive_data$linolenic_class) <- (0:3) * (pi/4)
olive_data$linolenic_class <- as.numeric(as.character(olive_data$linolenic_class))
ggplot(olive_data, aes(x=oleic, y=palmitic)) + geom_point() +
geom_spoke(aes(angle = olive_data$linolenic_class), radius=40) +
ggtitle("Dependence of Palmitic vs Oleic vs Linolenic
Legend
Orientation angle of spoke : Linolenic class
0:(0,18.5], 45:(18.5,37], 90:(37,55.5], 135:(0,18.5] ") +
theme(plot.title = element_text(hjust = 0.5))
ggplot(olive_data)+
geom_point(aes(x = oleic, y = eicosenoic, color = cut_interval(linoleic, n = 3),
shape = cut_interval(palmitic, n = 3),
size = cut_interval(palmitoleic, n = 3))) +
scale_size_manual(values = c(2,3,4)) +
labs(shape = "Palmitic range", color = "Linoleic range", size = "Palmitoleic range") +
ggtitle("Dependence of Oleic vs Eicosenoic vs Linoleic vs Palmitic vs Palmitoleic") +
theme(plot.title = element_text(hjust = 0.5))
baseball_scaled <- scale(baseball_data[,3:length(baseball_data)])
### Distance Matrix between rows
distance_matrix <- dist(baseball_scaled, method = "euclidean")
### Non-metric MDS
mds_result <- isoMDS(distance_matrix, k=2, p=2)
## initial value 19.856833
## iter 5 value 16.319153
## iter 10 value 16.046215
## final value 15.935476
## converged
coords <- mds_result$points
coords_mds <- as.data.frame(coords)
baseball_data_with_mds <- baseball_data
baseball_data_with_mds$MDS_V1 <- coords_mds$V1
baseball_data_with_mds$MDS_V2 <- coords_mds$V2
plot_ly(baseball_data_with_mds, x=~MDS_V1, y=~MDS_V2) %>%
add_markers(color=~League, colors = c("blue", "red"),
text=baseball_data_with_mds$Team, hoverinfo="text") %>%
layout(title="Scatterplot of the 2 MDS variables")
baseball_scaled <- scale(baseball_data[,3:length(baseball_data)])
### Distance Matrix between rows
distance_matrix <- dist(baseball_scaled, method = "euclidean")
mds_result <- isoMDS(distance_matrix, k=2, p=2)
## initial value 19.856833
## iter 5 value 16.319153
## iter 10 value 16.046215
## final value 15.935476
## converged
coords <- mds_result$points
shp <- Shepard(distance_matrix, coords)
delta <- as.numeric(distance_matrix)
D <- as.numeric(dist(coords, method = "euclidean"))
n <- nrow(coords)
index <- matrix(1:n, nrow=n, ncol=n)
index1 <- as.numeric(index[lower.tri(index)])
n <- nrow(coords)
index <- matrix(1:n, nrow=n, ncol=n, byrow = T)
index2 <- as.numeric(index[lower.tri(index)])
plot_ly()%>%
add_markers(x=~delta, y=~D, name="Observation pairs", hoverinfo = 'text',
text = ~paste('Obj 1: ',
rownames(baseball_data_with_mds)[index1],
'<br> Obj 2: ',
rownames(baseball_data_with_mds)[index2])) %>%
add_lines(x=~shp$x, y=~shp$yf, name="Isotonic regression line") %>%
layout(title="Shepard's plot of MDS operation")
plot_ly(olive_data,labels=~Area,type='pie',textinfo = "none") %>%
layout(title = "Pie chart of proportion of oils coming from different areas")
ggplot(olive_data)+geom_density_2d(aes(x=eicosenoic, y=linoleic, colour=as.factor(Region))) +
ggtitle("Contour plot of Linoleic vs Eicosenoic") +
theme(plot.title = element_text(hjust = 0.5))
plot_mapbox(aegypti_data[aegypti_data$YEAR == 2004,], lat = ~Y, lon = ~X) %>%
add_markers(color = ~VECTOR, hoverinfo = "text",
text = ~paste(COUNTRY), alpha = 0.7) %>%
layout(title = "Dot map of mosquito distribution in the world (2004)")
plot_geo(lat = c(40.7127, 51.5072), lon = c(-74.0059, 0.1275)) %>%
add_lines(color = I("blue"), size = I(2)) %>%
layout(
title = 'London to NYC Great Circle',
showlegend = FALSE,
geo = list(
resolution = 50,
showland = TRUE,
showlakes = TRUE,
landcolor = toRGB("grey80"),
countrycolor = toRGB("grey80"),
lakecolor = toRGB("white"),
projection = list(type = "equirectangular"),
coastlinewidth = 2,
lataxis = list(
range = c(20, 60),
showgrid = TRUE,
tickmode = "linear",
dtick = 10
),
lonaxis = list(
range = c(-100, 20),
showgrid = TRUE,
tickmode = "linear",
dtick = 20
)
)
)
# Data aggregation
country_aggregate = aggregate(aegypti_data[,c("COUNTRY", "COUNTRY_ID")],
by = list(aegypti_data$COUNTRY, aegypti_data$COUNTRY_ID), FUN=length)
country_aggregate$COUNTRY = NULL
colnames(country_aggregate) = c("COUNTRY", "COUNTRY_ID", "Count")
plot_geo(country_aggregate) %>% add_trace(
z = ~Count,
text = ~COUNTRY, locations = ~COUNTRY_ID) %>%
layout(title = "Choropleth plot of number of mosquitoes",
geo = list(projection = list(type = "equirectangular")))
plot_geo(country_aggregate) %>%
add_trace(
z = ~log(Count) ,
text = ~paste(COUNTRY, "\n Count: ", Count), locations = ~COUNTRY_ID,
hoverinfo = "text"
) %>%
layout(
title = "Choropleth plot of number of mosquitoes",
geo = list(projection = list(type = "equirectangular")))
plot_geo(country_aggregate) %>%
add_trace(
z = ~log(Count) ,
text = ~paste(COUNTRY, "\n Count: ", Count), locations = ~COUNTRY_ID
) %>%
layout(
title = "Choropleth plot of number of mosquitoes",
geo = list(projection = list(type = "conic equal area")))
swe_data = read.csv("000000KD.csv")
swe_data_processed = data.frame(region = unique(swe_data$region))
swe_data_split = split(swe_data, swe_data$age)
for (i in seq_along(swe_data_split)) {
swe_data_processed[[names(swe_data_split)[i]]] = merge(swe_data_split[[i]],
swe_data_processed$region,
by.x = 'region',
by.y = 1, all = T)$X2016
}
colnames(swe_data_processed) = c("region", "Young", "Adult", "Senior")
swe_data_processed$region = gsub(" county", "", swe_data_processed$region)
swe_data_processed$region = gsub("\\d{2} ", "", swe_data_processed$region)
swe_data_processed$region = gsub("Örebro", "Orebro", swe_data_processed$region)
rownames(swe_data_processed) = swe_data_processed$region
rds = readRDS('gadm36_SWE_1_sf.rds')
rds$Young = swe_data_processed[rds$NAME_1, "Young"]
plot_ly() %>% add_sf(data = rds, split = ~NAME_1,
color = ~Young, showlegend = F, alpha = 1) %>%
layout(title = "Choropleth plot of mean income of Young age group")